Data summary

Author

Florencia Grattarola

Published

April 16, 2025

library(janitor)
library(sf)
sf_use_s2(FALSE)
library(tmap)
tmap_mode('view')
library(tidyverse)
options(knitr.kable.NA = '')

Read data

metadata <- read_csv('data/metadata.csv', guess_max = 4000) 

Summary

Code
metadata %>% 
  summarise(`Number of records` = n(),
            `Number of sources` = n_distinct(source_name),
            `Number of taxa` = n_distinct(taxa),
            `Animalia records` = sum(kingdom == 'Animalia'),
            `Plantae records` = sum(kingdom == 'Plantae'),
            `Fungi records` = sum(kingdom == 'Fungi'),
            Countries = n_distinct(gadm_level_0),
            `Sub-national territories` = n_distinct(gadm_level_1)) %>% 
  t() %>% `colnames<-`(c("N")) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
N
Number of records 3200
Number of sources 2091
Number of taxa 483
Animalia records 2189
Plantae records 899
Fungi records 105
Countries 170
Sub-national territories 223

Geographic coverage

Code
# sources per continent
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  group_by(continent) %>% 
  summarise(n_sources = ifelse(n_distinct(source_name, na.rm = TRUE)==0, 
                               0, n_distinct(source_name, na.rm = TRUE))) %>% 
  arrange(desc(n_sources)) %>% 
  rename(`Number of sources`=n_sources) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
Number of sources per continent
continent Number of sources
Europe 1439
Asia 272
Africa 143
South America 116
North America 112
Oceania 31
Antarctica 7
Total 2120
Code
# records per continent
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  group_by(continent) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
Number of records per continent
continent Number of records
Europe 2182
Asia 470
Africa 205
North America 156
South America 150
Oceania 62
Antarctica 7
Total 3232
Code
# Europe
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  filter(continent == 'Europe') %>% 
  group_by(kingdom) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
Number of records by taxonomic kingdom in Europe
kingdom Number of records
Animalia 1551
Plantae 549
Fungi 75
Protozoa 6
Chromista 1
Total 2182
Code
# asia
metadata %>%
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  filter(continent == 'Asia') %>% 
  group_by(kingdom) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of records by taxonomic kingdom in Asia
kingdom Number of records
Animalia 277
Plantae 175
Fungi 18
Total 470
Code
# germany
metadata %>%
  filter(!is.na(class)) %>% 
  filter(gadm_level_0 == 'Germany') %>% 
  group_by(kingdom, class) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  ungroup %>% slice_head(n=5) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
Number of records by taxonomic class in Germany
kingdom class Number of records
Animalia Insecta 544
Animalia Aves 35
Animalia Mammalia 28
Animalia Amphibia 27
Animalia Arachnida 27
Total - 661
Code
# usa
metadata %>%
  filter(iso_2 == 'US') %>% 
  group_by(kingdom, taxa) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records`=n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position')) 
Number of records by taxonomic class in United States
kingdom taxa Number of records
Animalia Fauna 1
Fungi Fungi 1
Plantae Flora 1
Total - 3
Code
world <- geodata::world(resolution = 3, level = 0, path = 'data/')

world_records <- left_join(st_as_sf(world), metadata %>%
  separate_rows(iso_3, sep = '\\|') %>% 
  select(iso_2, GID_0=iso_3, source_name)) %>% 
  group_by(GID_0, NAME_0) %>% 
  summarise(n_sources = ifelse(n_distinct(source_name, na.rm = TRUE)==0, 
                               0, n_distinct(source_name, na.rm = TRUE)),
            iso_2_string = ifelse(n_sources>0,
                                  paste(iso_2, collapse = ';'), NA)) %>% 
  ungroup() %>% st_cast() %>% st_set_crs(4326)

plot_figure_1 <- tm_shape(world_records %>% 
           select(-iso_2_string) %>% 
           mutate(n_sources=ifelse(n_sources==0,
                                   NA, n_sources))) +
  tm_polygons(fill = 'n_sources',fill_alpha = 0.9,
              col='grey40', col_alpha = 0.2,
              fill.scale = tm_scale_intervals(n = 6, 
                                              #style = 'jenks', 
                                              breaks = c(1,5,10,20,100,979),
                                              values = 'brewer.reds',
                                              value.na = 'grey80',
                                              label.na = '0'),
              fill.legend = tm_legend(item.space = 0, item.na.space = 0,
                                      title = 'Number of sources', 
                                      reverse=T, 
                                      # frame=F, 
                                      frame.lwd = 0.1,
                                      bg.color = 'white')) +
  tm_layout(legend.outside = T, 
            legend.position = c('left','bottom'),  frame=F) +
  tm_crs(property='global')

tmap_mode('plot')
plot_figure_1

Global distribution of regional red list sources by country
Code
tmap_mode('view')
plot_figure_1

Global distribution of regional red list sources by country

Taxonomic coverage

Code
# sources by kingdom
metadata %>%
  group_by(kingdom) %>% 
  summarise(n_sources = n_distinct(source_name)) %>% 
  arrange(desc(n_sources)) %>% 
  rename(`Number of sources` = n_sources) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of sources by kingdom
kingdom Number of sources
Animalia 1434
Plantae 715
Fungi 101
Protozoa 6
Chromista 1
Total 2257
Code
# records by kingdom
metadata %>%
  group_by(kingdom) %>% 
  summarise(n_records = n()) %>% 
  arrange(desc(n_records)) %>% 
  rename(`Number of records` = n_records) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of records by kingdom
kingdom Number of records
Animalia 2189
Plantae 899
Fungi 105
Protozoa 6
Chromista 1
Total 3200
Code
# top 10 animalia by phylum
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n()) %>% 
  arrange(desc(n_sources_taxa)) %>% 
  slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  # adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Top 5 number of sources by phylum of kingdom Animalia
kingdom phylum Number of sources
Animalia Arthropoda 978
Animalia Chordata 849
Animalia Mollusca 71
Animalia Annelida 11
Animalia Cnidaria 10
Code
# top 10 animalia by class chordata
metadata %>% 
  filter(!is.na(class)) %>% 
  filter(phylum %in% c('Chordata')) %>% 
  group_by(kingdom, class) %>% 
  summarise(n_sources_taxa = n()) %>% 
  arrange(desc(n_sources_taxa)) %>% 
  slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  # adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Top 5 number of sources by class of phylum Chordata
kingdom class Number of sources
Animalia Mammalia 202
Animalia Aves 185
Animalia Reptilia 148
Animalia Amphibia 133
Animalia Petromyzonti 9
Code
# top 10 animalia by class arthropoda
metadata %>% 
  filter(!is.na(class)) %>% 
  filter(phylum %in% c('Arthropoda')) %>% 
  group_by(kingdom, class) %>% 
  summarise(n_sources_taxa = n()) %>% 
  arrange(desc(n_sources_taxa)) %>% 
  slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  # adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of sources by class of phylum Arthropoda
kingdom class Number of sources
Animalia Insecta 854
Animalia Arachnida 47
Animalia Malacostraca 19
Animalia Branchiopoda 9
Animalia Isopoda 7
Code
# vertebrates
metadata %>% 
  filter(class %in% c('Mammalia', 'Aves', 'Reptilia', 'Amphibia')) %>% 
  group_by(kingdom, class) %>% 
  summarise(n_sources_taxa = n()) %>% 
  arrange(desc(n_sources_taxa)) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  adorn_totals('row') %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of sources of vertebrates
kingdom class Number of sources
Animalia Mammalia 202
Animalia Aves 185
Animalia Reptilia 148
Animalia Amphibia 133
Total - 668
Code
# sources of flora and fauna
metadata %>%
  filter(taxa == 'Fauna' | taxa == 'Flora') %>%
  summarise(n_sources = n_distinct(source_name)) %>% 
  rename(`Number of sources` = n_sources) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of sources of flora and fauna (multi-taxa)
Number of sources
520
Code
# plantae
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Plantae')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  arrange(desc(n_sources_taxa)) %>% 
  slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Top 5 number of sources by order of kingdom Plantae
kingdom order Number of sources
Plantae Charales 18
Plantae Asparagales 5
Plantae Arecales 4
Plantae Caryophyllales 2
Plantae Malvales 2
Code
# fish sources
metadata %>% 
  filter(grepl('fish', taxa, ignore.case=T)) %>% 
  filter(!grepl('crayfish', taxa, ignore.case=T)) %>% 
  distinct(source_name) %>% count() %>% 
  rename(`Number of fish sources` = n) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of sources of fish
Number of fish sources
102
Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Global distribution of regional red list sources by country
Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Global distribution of regional red list sources by country
Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Global distribution of regional red list sources by country
Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Global distribution of regional red list sources by country
Code
# Create age categories
metadata %>%
  filter(kingdom %in% c('Animalia', 'Plantae', 'Fungi')) %>% 
  separate_rows(continent, sep = '\\|') %>%
  mutate(continent = str_squish(continent)) %>%
  group_by(continent, kingdom) %>%
  summarise(count = n()) %>%
  ungroup() %>% 
  ggplot(aes(x = reorder(continent, -count), y = count, fill = kingdom)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(x ='', y = 'Number of records', fill='') +
  # theme_minimal() +
  # scale_y_continuous(breaks = seq(0, 1500, by = 250), expand=c(0.01,0)) +
  coord_flip() +
  scale_fill_brewer(palette = "Set1") + 
  ggpubr::theme_pubclean() + 
  theme(legend.position = 'right')

Global distribution of regional red list sources by country

Temporal coverage

Code
metadata %>%
  mutate(period = case_when(year>=2014 ~ 'In the last decade', 
                            year<2014 ~ 'Older', 
                            .default = 'No year of publication')) %>% 
  group_by(period) %>% 
  summarise(n_sources = n_distinct(source_name)) %>% 
  arrange(desc(n_sources)) %>% 
  mutate(`%` = scales::label_percent()(n_sources / sum(n_sources))) %>% 
  rename(`Year of publication` = period,
         `Number of sources` = n_sources) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Year of publication of the sources
Year of publication Number of sources %
Older 1073 50.8%
In the last decade 1029 48.7%
No year of publication 10 0.5%
Code
metadata %>% 
  group_by(year) %>% 
  summarise(publications_year = n_distinct(source_name)) %>% 
  ggplot(aes(x = year, y = publications_year)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  ylim(c(0,150)) + xlim(c(1975,2025)) +
  scale_x_continuous(n.breaks = 15) +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean()

Number of regional red list sources published per year